Packages

require(tidyr)
require(dplyr)
require(magrittr)
require(ggplot2)
require(bayesplot)
require(ape)
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
require(brms)
require(ggmcmc)
require(knitr)
require(phytools)
require(cowplot)

# RMSE function
RMSE = function(m, o){
  sqrt(mean((m - o)^2))
}

Loading data

data <- read.csv("./sp.rare.inv.csv", as.is=F)
# data <- read.csv("./sp.rare.inv2.csv", as.is=F)
# data <- read.csv("./sp.rare.inv.imp.csv", as.is=F)
# data %>% purrr::keep(is.numeric) %>% cor(., use="complete.obs") %>% View()

# one family has more rare species than species
data[data$rare>data$species,]

# increasing species by 1 to match
data$species[data$family=="Metteniusaceae"] <- 8

range(data$vetted)
nrow(data[data$vetted==0,])

# creating column for woodiness
data$woodiness <- data$herbaceous + data$woody + data$variable

# fixing typo in names 
names(data)[names(data)%in%"dmonoecious"] <- "monoecious"

### Zanne tree & generating tree representing families
tree <- read.tree("./Vascular_Plants_rooted.dated.tre")
spec_fam <- read.csv("./spec.fam.csv", as.is=T)
tree <- drop.tip(tree, setdiff(tree$tip.label,as.character(spec_fam$sp)))
lookup <- setNames(spec_fam$family, spec_fam$sp)
tree$tip.label <- as.character(lookup[tree$tip.label])

# Getting new family age
min_above_zero <- function(x){

  if (length(x)>1) {
    min(x[x>0])
  } else {0}

}

# Calculate family age (may be influenced by families missing from the tree)
coph <- cophenetic(tree)

fam_age <- sapply(rownames(coph), function(fam) 
          min_above_zero(coph[rownames(coph)%in%fam,])
          ) 
# old_ages <- data %>% select(family, age)
new_ages <- data.frame(family=names(fam_age), new_age=as.numeric(fam_age)/2)
new_ages %<>% arrange(family)
data <- left_join(data,new_ages)

# There are some ages of NA (not in tree?)
# Remove these
data <- data[!is.na(data$new_age),]

# Calculating simple diversification metric - log(N+1)/age
data$diversification <- with(data, log(species)/new_age)

# creating another family level variable (for non-phylogenetic family level effects)
data$family_name <- data$family

# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label)
# intersect(data$family, tree$tip.label)

# remove missing families in data
fdata <- data[data$family%in%tree$tip.label,]

# phylogenetic correlation structure
phy_cov <- vcv(tree, corr=TRUE)


# Scaling predictors 

# Code for scale and unscale with mean=0 sd=0.5 (following Gelman 2008 for logistic regression)

scale_half <- function(x){
  return( (x-mean(x)) * 0.5/sd(x))
}

unscale_half <- function(x,original){
    x*sd(original)*2 + mean(original)  
}

# Scaling continuous predictors
data$new_age_scaled <- scale_half(log(data$new_age))
data$species_scaled <- scale_half(log(data$species))
data$diversification_scaled <- scale_half(log(data$diversification+1))
data$Naturalised_scaled <- scale_half(log(data$Naturalised+1))

# Subsetting to IUCN vetted species (for rarity models)
data_vetted <- data[data$vetted>0,]
tree_vetted <- drop.tip(tree, setdiff(tree$tip.label, as.character(data_vetted$family)))
phy_cov_vetted <- vcv(tree_vetted, corr=TRUE)


# vifs 
require(car)

vif(lm(rare ~ Naturalised_scaled + diversification_scaled + new_age_scaled, data=data_vetted))

vif(lm(rare ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal, data=data_vetted))

   

Rarity models (with IUCN evaluated species)

Simple rarity model

if (!file.exists("./vet_rare_naturalized_10k.rds")) {

  vet_rare_nat <- brm(rare | trials(vetted) ~ Naturalised_scaled + diversification_scaled + new_age_scaled +  (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_rare_nat, "./vet_rare_naturalized_10k.rds")

} else { vet_rare_nat <- readRDS("./vet_rare_naturalized_10k.rds")}

# vet_rare_nat
# pairs(vet_rare_nat)

outcome <- summary(vet_rare_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age","SD brownian effects","SD family specific effects")

vet_rare_nat_plot <- stanplot(vet_rare_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nat_plot

pdf("./plots_tables/vet_rare_nat_plot.pdf", width=4, height=3)
vet_rare_nat_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.70 0.00 0.19 -1.07 -0.83 -0.70 -0.57 -0.34 4362.70 1.00
Diversification rate 1.20 0.00 0.31 0.61 0.99 1.20 1.41 1.81 4519.84 1.00
Family age 1.01 0.00 0.32 0.38 0.80 1.01 1.22 1.63 4402.76 1.00
SD brownian effects 1.00 0.01 0.21 0.62 0.86 1.00 1.14 1.43 853.74 1.00
SD family specific effects 0.61 0.01 0.15 0.22 0.54 0.64 0.72 0.86 538.28 1.01
bayes_R2(vet_rare_nat)
##     Estimate    Est.Error      Q2.5    Q97.5
## R2 0.9983447 0.0003910453 0.9973925 0.998945
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat <- brms::posterior_predict(vet_rare_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nat$data$rare, pp_vet_rare_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_rare_nat <- apply(pp_vet_rare_nat, 1, function(x) RMSE(x,vet_rare_nat$data$rare))
# plot(density(rmse_vet_rare_nat), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat)
## [1] 6.178086
sd(rmse_vet_rare_nat)
## [1] 0.7116806
# LOO
loo(vet_rare_nat)
## Warning: Found 145 observations with a pareto_k > 0.7 in model
## 'vet_rare_nat'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 259 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -671.2 18.9
## p_loo       164.0  6.0
## looic      1342.4 37.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      56   21.6%   1009      
##  (0.5, 0.7]   (ok)        58   22.4%   200       
##    (0.7, 1]   (bad)      116   44.8%   20        
##    (1, Inf)   (very bad)  29   11.2%   7         
## See help('pareto-k-diagnostic') for details.

IUCN vetted full rarity model

if (!file.exists("./vet_rare_10k.rds")) {

  vet_rare <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare, "./vet_rare_10k.rds")

} else { vet_rare <- readRDS("./vet_rare_10k.rds")}

# vet_rare

outcome <- summary(vet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age","Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

vet_rare_plot <- stanplot(vet_rare, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_plot

pdf("./plots_tables/vet_rare_plot.pdf", width=4, height=7)
vet_rare_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.46 0.01 0.34 -0.23 0.24 0.47 0.70 1.11 4629.61 1
Famly age 0.41 0.00 0.31 -0.21 0.20 0.42 0.62 1.01 3959.66 1
Woodiness 0.63 0.00 0.14 0.35 0.54 0.63 0.73 0.91 4465.82 1
Herbaceous -1.08 0.00 0.23 -1.52 -1.24 -1.09 -0.92 -0.61 2553.18 1
Climate sum -0.28 0.00 0.10 -0.47 -0.34 -0.28 -0.21 -0.09 4502.17 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4573.29 1
Hermaphroditic 0.17 0.00 0.23 -0.29 0.01 0.16 0.33 0.63 4412.89 1
Dioecious -0.15 0.00 0.16 -0.46 -0.26 -0.15 -0.04 0.15 4776.48 1
Monoecious -0.11 0.00 0.16 -0.43 -0.22 -0.11 -0.01 0.20 4729.80 1
Asexual 0.02 0.00 0.17 -0.31 -0.09 0.02 0.13 0.34 4575.64 1
Fleshy fruit 0.04 0.00 0.15 -0.26 -0.06 0.04 0.15 0.35 4665.84 1
Animal pollinated 0.60 0.00 0.26 0.10 0.42 0.60 0.77 1.10 4267.24 1
SD brownian effects 0.51 0.01 0.29 0.03 0.29 0.51 0.72 1.09 612.16 1
SD family specific effects 0.75 0.00 0.12 0.46 0.68 0.76 0.83 0.94 803.69 1
bayes_R2(vet_rare)
##     Estimate    Est.Error      Q2.5     Q97.5
## R2 0.9983752 0.0003806278 0.9974841 0.9989616
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(vet_rare)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet <- apply(pp_rare, 1, function(x) RMSE(x,vet_rare$data$rare))
# plot(density(rmse_vet), main="RMSE across posterior predictions")
mean(rmse_vet)
## [1] 6.31501
sd(rmse_vet)
## [1] 0.7443097
# LOO
loo(vet_rare)
## Warning: Found 131 observations with a pareto_k > 0.7 in model 'vet_rare'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
## 
## Computed from 5000 by 243 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -633.7 19.3
## p_loo       155.6  6.6
## looic      1267.4 38.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      53   21.8%   1160      
##  (0.5, 0.7]   (ok)        59   24.3%   209       
##    (0.7, 1]   (bad)      107   44.0%   21        
##    (1, Inf)   (very bad)  24    9.9%   5         
## See help('pareto-k-diagnostic') for details.

IUCN vetted rarity model: no family level effects

if (!file.exists("./vet_rare_nofam_10k.rds")) {

  vet_rare_nofam <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal, 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_nofam, "./vet_rare_nofam_10k.rds")

} else { vet_rare_nofam <- readRDS("./vet_rare_nofam_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_nofam

outcome <- summary(vet_rare_nofam$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated")

vet_rare_nofam_plot <- stanplot(vet_rare_nofam, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.06 0 0.10 -0.13 0.00 0.06 0.13 0.25 4939.75 1
Famly age 0.06 0 0.09 -0.13 0.00 0.06 0.12 0.24 5070.41 1
Woodiness 0.52 0 0.03 0.45 0.49 0.51 0.54 0.58 4504.94 1
Herbaceous -0.67 0 0.05 -0.77 -0.71 -0.67 -0.64 -0.57 4674.90 1
Climate sum -0.42 0 0.02 -0.47 -0.44 -0.42 -0.40 -0.37 4854.91 1
Number of species hybrids 0.00 0 0.00 0.00 0.00 0.00 0.00 0.00 4946.06 1
Hermaphroditic 0.30 0 0.06 0.18 0.26 0.30 0.34 0.42 4697.34 1
Dioecious 0.14 0 0.03 0.07 0.12 0.14 0.16 0.21 4622.04 1
Monoecious -0.15 0 0.04 -0.23 -0.18 -0.15 -0.13 -0.08 4840.76 1
Asexual 0.14 0 0.04 0.07 0.12 0.14 0.16 0.21 5070.97 1
Fleshy fruit -0.22 0 0.02 -0.26 -0.23 -0.22 -0.20 -0.17 4533.41 1
Animal pollinated 0.97 0 0.06 0.85 0.93 0.97 1.01 1.09 4877.15 1
vet_rare_nofam_plot

pdf("./plots_tables/vet_rare_nofam_plot.pdf", width=4, height=7)
vet_rare_nofam_plot
dev.off()
## png 
##   2
bayes_R2(vet_rare_nofam)
##     Estimate   Est.Error      Q2.5     Q97.5
## R2 0.9595333 0.002023691 0.9553965 0.9633137
##Posterior predictive checks
# first make posterior predictions
pp_rare_nofam <- brms::posterior_predict(vet_rare_nofam)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nofam$data$rare, pp_rare_nofam[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE

rmse_rare_nofam <- apply(pp_rare_nofam, 1, function(x) RMSE(x,vet_rare_nofam$data$rare))
plot(density(rmse_rare_nofam), main="RMSE across posterior predictions")

mean(rmse_rare_nofam)
## [1] 24.24252
sd(rmse_rare_nofam)
## [1] 1.362858
# LOO
loo(vet_rare_nofam)
## Warning: Found 16 observations with a pareto_k > 0.7 in model
## 'vet_rare_nofam'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 243 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo  -1490.9 164.5
## p_loo       211.1  63.3
## looic      2981.8 329.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     216   88.9%   288       
##  (0.5, 0.7]   (ok)        11    4.5%   121       
##    (0.7, 1]   (bad)        8    3.3%   16        
##    (1, Inf)   (very bad)   8    3.3%   1         
## See help('pareto-k-diagnostic') for details.

IUCN vetted rarity model: only non-brownian family effects

if (!file.exists("./vet_rare_nophylo_10k.rds")) {

  vet_rare_nophylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_nophylo, "./vet_rare_nophylo_10k.rds")

} else { vet_rare_nophylo <- readRDS("./vet_rare_nophylo_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_nophylo

outcome <- summary(vet_rare_nophylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD family specific effects")

vet_rare_nophylo_plot <- stanplot(vet_rare_nophylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.49 0.01 0.34 -0.19 0.27 0.49 0.72 1.14 3008.71 1
Famly age 0.51 0.01 0.30 -0.10 0.31 0.51 0.70 1.10 3226.27 1
Woodiness 0.66 0.00 0.12 0.42 0.58 0.66 0.75 0.91 3433.68 1
Herbaceous -1.19 0.00 0.19 -1.57 -1.32 -1.19 -1.06 -0.83 3513.24 1
Climate sum -0.29 0.00 0.10 -0.48 -0.35 -0.29 -0.22 -0.10 3204.75 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4305.33 1
Hermaphroditic 0.16 0.00 0.23 -0.28 0.01 0.16 0.32 0.61 3335.70 1
Dioecious -0.15 0.00 0.16 -0.46 -0.26 -0.15 -0.05 0.15 3225.54 1
Monoecious -0.12 0.00 0.16 -0.42 -0.23 -0.12 -0.01 0.19 3168.32 1
Asexual 0.04 0.00 0.16 -0.28 -0.07 0.04 0.14 0.35 2925.31 1
Fleshy fruit 0.05 0.00 0.15 -0.25 -0.05 0.05 0.16 0.36 4062.99 1
Animal pollinated 0.67 0.00 0.24 0.21 0.52 0.67 0.83 1.14 3827.89 1
SD family specific effects 0.85 0.00 0.06 0.74 0.81 0.85 0.89 0.98 3620.69 1
vet_rare_nophylo_plot

pdf("./plots_tables/vet_rare_nophylo_plot.pdf", width=4, height=7)
vet_rare_nophylo_plot
dev.off()
## png 
##   2
bayes_R2(vet_rare_nophylo)
##     Estimate   Est.Error      Q2.5     Q97.5
## R2 0.9983705 0.000393171 0.9974769 0.9989715
##Posterior predictive checks
# first make posterior predictions
pp_rare_nophylo <- brms::posterior_predict(vet_rare_nophylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nophylo$data$rare, pp_rare_nophylo[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_rare_nophylo <- apply(pp_rare_nophylo, 1, function(x) RMSE(x,vet_rare_nophylo$data$rare))
plot(density(rmse_rare_nophylo), main="RMSE across posterior predictions")

mean(rmse_rare_nophylo)
## [1] 6.325529
sd(rmse_rare_nophylo)
## [1] 0.76199
# LOO
loo(vet_rare_nophylo)
## Warning: Found 130 observations with a pareto_k > 0.7 in model
## 'vet_rare_nophylo'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 243 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -631.9 19.2
## p_loo       153.7  6.6
## looic      1263.9 38.3
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      61   25.1%   742       
##  (0.5, 0.7]   (ok)        52   21.4%   155       
##    (0.7, 1]   (bad)      110   45.3%   24        
##    (1, Inf)   (very bad)  20    8.2%   6         
## See help('pareto-k-diagnostic') for details.

IUCN vetted rarity model: only brownian family effects

if (!file.exists("./vet_rare_phylo_10k.rds")) {

  vet_rare_phylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal+ (1|family), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_phylo, "./vet_rare_phylo_10k.rds")

} else { vet_rare_phylo <- readRDS("./vet_rare_phylo_10k.rds")}
## Warning: Rows containing NAs were excluded from the model.
## Compiling the C++ model
## Start sampling
# vet_rare_phylo

outcome <- summary(vet_rare_phylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects")

vet_rare_phylo_plot <- stanplot(vet_rare_phylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.53 0.01 0.34 -0.12 0.30 0.53 0.75 1.20 4123.41 1
Famly age 0.31 0.01 0.34 -0.37 0.08 0.32 0.53 0.99 4024.22 1
Woodiness 0.59 0.00 0.14 0.31 0.50 0.59 0.68 0.86 3679.34 1
Herbaceous -0.93 0.00 0.23 -1.38 -1.08 -0.93 -0.78 -0.49 3858.38 1
Climate sum -0.26 0.00 0.10 -0.44 -0.32 -0.26 -0.19 -0.07 4016.73 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4554.56 1
Hermaphroditic 0.13 0.00 0.23 -0.32 -0.02 0.13 0.28 0.57 4164.96 1
Dioecious -0.16 0.00 0.15 -0.45 -0.26 -0.15 -0.06 0.14 3712.41 1
Monoecious -0.06 0.00 0.15 -0.37 -0.17 -0.06 0.05 0.23 3656.72 1
Asexual -0.01 0.00 0.16 -0.33 -0.12 -0.01 0.10 0.30 3761.79 1
Fleshy fruit -0.07 0.00 0.12 -0.31 -0.16 -0.07 0.01 0.17 4049.59 1
Animal pollinated 0.49 0.00 0.26 -0.01 0.32 0.49 0.67 1.01 3867.06 1
SD brownian effects 1.33 0.00 0.10 1.14 1.26 1.32 1.39 1.53 3845.59 1
vet_rare_phylo_plot

pdf("./plots_tables/vet_rare_phylo_plot.pdf", width=4, height=7)
vet_rare_phylo_plot
dev.off()
## png 
##   2
bayes_R2(vet_rare_phylo)
##     Estimate    Est.Error      Q2.5     Q97.5
## R2 0.9983544 0.0004038597 0.9974098 0.9989625
##Posterior predictive checks
# first make posterior predictions
pp_rare_phylo <- brms::posterior_predict(vet_rare_phylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_phylo$data$rare, pp_rare_phylo[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_rare_phylo <- apply(pp_rare_phylo, 1, function(x) RMSE(x,vet_rare_phylo$data$rare))
plot(density(rmse_rare_phylo), main="RMSE across posterior predictions")

mean(rmse_rare_phylo)
## [1] 6.322799
sd(rmse_rare_phylo)
## [1] 0.7548457
# LOO
loo(vet_rare_phylo)
## Warning: Found 139 observations with a pareto_k > 0.7 in model
## 'vet_rare_phylo'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 243 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -638.3 19.7
## p_loo       158.3  7.2
## looic      1276.6 39.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      59   24.3%   563       
##  (0.5, 0.7]   (ok)        45   18.5%   189       
##    (0.7, 1]   (bad)      111   45.7%   20        
##    (1, Inf)   (very bad)  28   11.5%   9         
## See help('pareto-k-diagnostic') for details.

Naturalized model

Proportion naturalized species

if (!file.exists("./fit_nat_10k.rds")) {

  fit_nat <- brm(Naturalised | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.8, max_treedepth=11))

  saveRDS(fit_nat, "./fit_nat_10k.rds")

} else { fit_nat <- readRDS("./fit_nat_10k.rds")}

# fit_nat

outcome <- summary(fit_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

fit_nat_plot <- stanplot(fit_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_nat_plot

pdf("./plots_tables/fit_nat_plot.pdf", width=4, height=7)
fit_nat_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -1.65 0.00 0.33 -2.29 -1.88 -1.65 -1.42 -1.00 4580.08 1.00
Famly age -0.92 0.01 0.35 -1.60 -1.16 -0.92 -0.68 -0.22 4521.70 1.00
Woodiness -0.05 0.00 0.16 -0.37 -0.16 -0.04 0.06 0.27 4366.29 1.00
Herbaceous 0.95 0.00 0.26 0.45 0.78 0.95 1.12 1.46 4277.33 1.00
Climate sum 0.33 0.00 0.12 0.10 0.25 0.33 0.40 0.55 4231.76 1.00
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4635.04 1.00
Hermaphroditic 0.08 0.00 0.28 -0.47 -0.11 0.08 0.27 0.64 4162.15 1.00
Dioecious -0.11 0.00 0.19 -0.50 -0.24 -0.11 0.02 0.26 4204.36 1.00
Monoecious -0.17 0.00 0.19 -0.52 -0.29 -0.17 -0.04 0.22 4640.40 1.00
Asexual 0.40 0.00 0.20 0.00 0.27 0.40 0.54 0.80 4542.51 1.00
Fleshy fruit 0.00 0.00 0.17 -0.34 -0.11 0.00 0.11 0.33 4650.20 1.00
Animal pollinated -0.80 0.00 0.32 -1.43 -1.03 -0.80 -0.58 -0.17 4228.21 1.00
SD brownian effects 1.76 0.01 0.22 1.29 1.61 1.78 1.91 2.13 670.57 1.01
SD family specific effects 0.47 0.01 0.24 0.03 0.29 0.49 0.65 0.91 449.64 1.01
bayes_R2(fit_nat)
##     Estimate    Est.Error      Q2.5     Q97.5
## R2 0.9967621 0.0008056879 0.9948163 0.9979574
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.91      0.09     0.69        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
plot(hyp)

##Posterior predictive checks
# first make posterior predictions
pp_nat <- brms::posterior_predict(fit_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_nat$data$Naturalised, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

rmse_nat <- apply(pp_nat, 1, function(x) RMSE(x,fit_nat$data$Naturalised))
plot(density(rmse_nat), main="RMSE across posterior predictions")

mean(rmse_nat)
## [1] 7.183274
sd(rmse_nat)
## [1] 0.8720455
# LOO
loo(fit_nat)
## Warning: Found 180 observations with a pareto_k > 0.7 in model 'fit_nat'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
## 
## Computed from 5000 by 357 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -788.4 30.5
## p_loo       207.0  9.5
## looic      1576.9 61.1
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      77   21.6%   1108      
##  (0.5, 0.7]   (ok)       100   28.0%   203       
##    (0.7, 1]   (bad)      146   40.9%   16        
##    (1, Inf)   (very bad)  34    9.5%   8         
## See help('pareto-k-diagnostic') for details.

Combined Forest Plot

a <- vet_rare_plot + xlim(-4.5, 5)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
b <- fit_nat_plot + xlim(-4.5, 5) + theme(axis.text.y=element_blank())
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
theme_set(theme_cowplot(font_size=12)) # reduce default font size
plot_grid(a, b, labels = c(' ', ' '), align='h', label_x=0.93, rel_widths=c(1,0.66))

ggsave("plots_tables/combined_forest_plot.png", width=8, height=5, units="in")
ggsave("plots_tables/combined_forest_plot.pdf", width=8, height=5, units="in")

Invasive model

Proportion invasive species

if (!file.exists("./fit_inv_10k.rds")) {

  fit_inv <- brm(Invasive | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.8, max_treedepth=11))

  saveRDS(fit_inv, "./fit_inv_10k.rds")

} else { fit_inv <- readRDS("./fit_inv_10k.rds")}

# fit_inv

outcome <- summary(fit_inv$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

fit_inv_plot <- stanplot(fit_inv, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of invasive species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_inv_plot

pdf("./plots_tables/fit_inv_plot.pdf", width=4, height=7)
fit_inv_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -1.02 0.01 0.45 -1.87 -1.32 -1.03 -0.72 -0.12 4407.15 1
Famly age -1.30 0.01 0.43 -2.13 -1.60 -1.30 -1.02 -0.46 4580.42 1
Woodiness 0.21 0.00 0.19 -0.16 0.09 0.21 0.33 0.58 4970.14 1
Herbaceous 0.26 0.00 0.32 -0.37 0.04 0.26 0.47 0.89 4288.17 1
Climate sum 0.17 0.00 0.14 -0.11 0.07 0.17 0.26 0.43 4302.02 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4209.73 1
Hermaphroditic -0.19 0.00 0.34 -0.84 -0.41 -0.20 0.03 0.48 4791.70 1
Dioecious -0.09 0.00 0.21 -0.52 -0.23 -0.10 0.04 0.33 5011.49 1
Monoecious -0.10 0.00 0.22 -0.52 -0.25 -0.10 0.04 0.33 4295.29 1
Asexual 0.48 0.00 0.22 0.04 0.33 0.48 0.63 0.92 4926.11 1
Fleshy fruit -0.02 0.00 0.20 -0.40 -0.15 -0.02 0.11 0.36 4206.03 1
Animal pollinated -0.32 0.01 0.37 -1.03 -0.57 -0.32 -0.07 0.42 3641.29 1
SD brownian effects 1.18 0.02 0.43 0.20 0.91 1.23 1.51 1.87 637.45 1
SD family specific effects 0.70 0.01 0.28 0.09 0.51 0.74 0.91 1.16 631.77 1
bayes_R2(fit_inv)
##     Estimate   Est.Error      Q2.5     Q97.5
## R2 0.9858434 0.004242523 0.9752231 0.9916901
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_inv, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.67      0.27     0.03        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
plot(hyp)

##Posterior predictive checks
# first make posterior predictions
pp_inv <- brms::posterior_predict(fit_inv)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_inv$data$Invasive, pp_inv[1:50, ]) + scale_x_continuous(trans="log1p")

rmse_inv <- apply(pp_inv, 1, function(x) RMSE(x,fit_inv$data$Invasive))
plot(density(rmse_inv), main="RMSE across posterior predictions")

mean(rmse_inv)
## [1] 3.023248
sd(rmse_inv)
## [1] 0.4356598
# LOO
loo(fit_inv)
## Warning: Found 114 observations with a pareto_k > 0.7 in model 'fit_inv'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
## 
## Computed from 5000 by 357 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -468.6 28.1
## p_loo       133.3 10.4
## looic       937.3 56.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     156   43.7%   1042      
##  (0.5, 0.7]   (ok)        87   24.4%   140       
##    (0.7, 1]   (bad)       96   26.9%   27        
##    (1, Inf)   (very bad)  18    5.0%   3         
## See help('pareto-k-diagnostic') for details.

Sensitivity Analyses

Simple rarity model with quadratic age term

if (!file.exists("./vet_rare_naturalized_quad_age_10k.rds")) {

  vet_rare_nat_qa <- brm(rare | trials(vetted) ~ Naturalised_scaled + diversification_scaled + new_age_scaled + I(new_age_scaled^2) + (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_rare_nat_qa, "./vet_rare_naturalized_quad_age_10k.rds")

} else { vet_rare_nat_qa <- readRDS("./vet_rare_naturalized_quad_age_10k.rds")}

# vet_rare_nat_qa
# pairs(vet_rare_nat_qa)

outcome <- summary(vet_rare_nat_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age", "Family Age ^2","SD brownian effects","SD family specific effects")

vet_rare_nat_qa_plot <- stanplot(vet_rare_nat_qa, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nat_qa_plot

pdf("./plots_tables/vet_rare_nat_qa_plot.pdf", width=4, height=3)
vet_rare_nat_qa_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.73 0.00 0.20 -1.12 -0.86 -0.73 -0.60 -0.36 4409.60 1
Diversification rate 1.25 0.00 0.33 0.62 1.03 1.25 1.47 1.90 4692.22 1
Family age 1.03 0.00 0.32 0.40 0.82 1.03 1.25 1.67 4623.45 1
Family Age ^2 -0.21 0.00 0.29 -0.80 -0.41 -0.21 -0.01 0.37 4802.55 1
SD brownian effects 0.98 0.01 0.20 0.59 0.84 0.98 1.12 1.38 1338.77 1
SD family specific effects 0.64 0.00 0.14 0.32 0.56 0.65 0.74 0.88 1316.53 1
bayes_R2(vet_rare_nat_qa)
##     Estimate    Est.Error     Q2.5     Q97.5
## R2 0.9983417 0.0003915706 0.997416 0.9989467
##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_qa <- brms::posterior_predict(vet_rare_nat_qa)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nat_qa$data$rare, pp_vet_rare_nat_qa[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_rare_nat_qa <- apply(pp_vet_rare_nat_qa, 1, function(x) RMSE(x,vet_rare_nat_qa$data$rare))
plot(density(rmse_vet_rare_nat_qa), main="RMSE across posterior predictions")

mean(rmse_vet_rare_nat_qa)
## [1] 6.171456
sd(rmse_vet_rare_nat_qa)
## [1] 0.7207039
# LOO
loo(vet_rare_nat_qa)
## Warning: Found 153 observations with a pareto_k > 0.7 in model
## 'vet_rare_nat_qa'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 259 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -668.1 18.8
## p_loo       161.2  5.8
## looic      1336.3 37.6
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      55   21.2%   483       
##  (0.5, 0.7]   (ok)        51   19.7%   214       
##    (0.7, 1]   (bad)      132   51.0%   22        
##    (1, Inf)   (very bad)  21    8.1%   12        
## See help('pareto-k-diagnostic') for details.

IUCN vetted full rarity model with quadratic term

if (!file.exists("./vet_rare_quad_age_10k.rds")) {

  vet_rare_qa <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + I(new_age_scaled^2) + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_qa, "./vet_rare_quad_age_10k.rds")

} else { vet_rare_qa <- readRDS("./vet_rare_quad_age_10k.rds")}

# vet_rare_qa

outcome <- summary(vet_rare_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Family Age ^2", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

vet_rare_qa_plot <- stanplot(vet_rare_qa, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_qa_plot

pdf("./plots_tables/vet_rare_qa_plot.pdf", width=4, height=7)
vet_rare_qa_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.46 0.01 0.35 -0.21 0.23 0.46 0.69 1.15 3961.81 1
Famly age 0.40 0.00 0.32 -0.22 0.19 0.40 0.62 1.03 4157.17 1
Family Age ^2 0.00 0.00 0.28 -0.54 -0.19 0.00 0.19 0.55 4319.58 1
Woodiness 0.63 0.00 0.14 0.35 0.54 0.63 0.72 0.91 4335.46 1
Herbaceous -1.08 0.00 0.23 -1.54 -1.23 -1.08 -0.93 -0.61 3203.40 1
Climate sum -0.28 0.00 0.10 -0.47 -0.34 -0.28 -0.21 -0.09 4217.03 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4796.10 1
Hermaphroditic 0.16 0.00 0.23 -0.29 0.01 0.16 0.31 0.62 4124.98 1
Dioecious -0.15 0.00 0.16 -0.46 -0.26 -0.15 -0.05 0.16 4830.01 1
Monoecious -0.12 0.00 0.16 -0.44 -0.23 -0.12 0.00 0.21 4678.97 1
Asexual 0.02 0.00 0.17 -0.30 -0.09 0.02 0.13 0.36 4749.06 1
Fleshy fruit 0.04 0.00 0.16 -0.26 -0.06 0.04 0.15 0.36 3927.81 1
Animal pollinated 0.59 0.00 0.25 0.11 0.41 0.59 0.76 1.08 4061.07 1
SD brownian effects 0.55 0.01 0.28 0.04 0.34 0.56 0.75 1.10 643.91 1
SD family specific effects 0.74 0.00 0.12 0.47 0.67 0.75 0.82 0.94 850.53 1
bayes_R2(vet_rare_qa)
##     Estimate    Est.Error      Q2.5     Q97.5
## R2 0.9983654 0.0004019589 0.9973731 0.9989681
##Posterior predictive checks
# first make posterior predictions
pp_rare_qa <- brms::posterior_predict(vet_rare_qa)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_qa$data$rare, pp_rare_qa[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_vet_qa <- apply(pp_rare_qa, 1, function(x) RMSE(x,vet_rare_qa$data$rare))
plot(density(rmse_vet_qa), main="RMSE across posterior predictions")

mean(rmse_vet_qa)
## [1] 6.306899
sd(rmse_vet_qa)
## [1] 0.7333391
# LOO
loo(vet_rare)
## Warning: Found 131 observations with a pareto_k > 0.7 in model 'vet_rare'.
## With this many problematic observations, it may be more appropriate to use
## 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather
## than LOO.
## 
## Computed from 5000 by 243 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -633.7 19.3
## p_loo       155.6  6.6
## looic      1267.4 38.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      53   21.8%   1160      
##  (0.5, 0.7]   (ok)        59   24.3%   209       
##    (0.7, 1]   (bad)      107   44.0%   21        
##    (1, Inf)   (very bad)  24    9.9%   5         
## See help('pareto-k-diagnostic') for details.

IUCN non-vetted full rarity model

if (!file.exists("./nonvet_rare_10k.rds")) {

  nonvet_rare <- brm(rare | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(nonvet_rare, "./nonvet_rare_10k.rds")

} else { nonvet_rare <- readRDS("./nonvet_rare_10k.rds")}

# nonvet_rare

outcome <- summary(nonvet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

nonvet_rare_plot <- stanplot(nonvet_rare, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
nonvet_rare_plot

pdf("./plots_tables/nonvet_rare_plot.pdf", width=4, height=7)
nonvet_rare_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.68 0.01 0.36 0.00 0.44 0.68 0.92 1.42 4441.99 1.00
Famly age 0.50 0.00 0.32 -0.10 0.29 0.50 0.71 1.16 4552.94 1.00
Woodiness -0.42 0.00 0.16 -0.74 -0.53 -0.42 -0.31 -0.11 3988.19 1.00
Herbaceous -0.60 0.00 0.25 -1.11 -0.77 -0.59 -0.43 -0.10 3551.10 1.00
Climate sum -0.19 0.00 0.11 -0.41 -0.27 -0.19 -0.12 0.02 4535.94 1.00
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4766.30 1.00
Hermaphroditic -0.12 0.00 0.27 -0.63 -0.29 -0.12 0.07 0.41 4368.39 1.00
Dioecious -0.19 0.00 0.19 -0.56 -0.32 -0.19 -0.07 0.18 4424.59 1.00
Monoecious 0.08 0.00 0.19 -0.29 -0.04 0.08 0.21 0.44 4441.74 1.00
Asexual 0.15 0.00 0.21 -0.25 0.01 0.15 0.29 0.55 4520.87 1.00
Fleshy fruit 0.24 0.00 0.18 -0.13 0.11 0.23 0.36 0.59 4577.38 1.00
Animal pollinated 0.16 0.00 0.31 -0.44 -0.05 0.16 0.37 0.76 4464.41 1.00
SD brownian effects 0.69 0.01 0.31 0.08 0.47 0.70 0.91 1.28 472.67 1.01
SD family specific effects 1.07 0.00 0.12 0.80 0.99 1.07 1.15 1.29 794.26 1.01
bayes_R2(nonvet_rare)
##     Estimate    Est.Error      Q2.5     Q97.5
## R2 0.9964116 0.0007896307 0.9946255 0.9976707
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(nonvet_rare)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=nonvet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_nonvet <- apply(pp_rare, 1, function(x) RMSE(x,nonvet_rare$data$rare))
plot(density(rmse_nonvet), main="RMSE across posterior predictions")

mean(rmse_nonvet)
## [1] 7.977609
sd(rmse_nonvet)
## [1] 0.8493576
# LOO
loo(nonvet_rare)
## Warning: Found 192 observations with a pareto_k > 0.7 in model
## 'nonvet_rare'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 357 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -816.6 31.4
## p_loo       208.9  9.6
## looic      1633.3 62.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      88   24.6%   599       
##  (0.5, 0.7]   (ok)        77   21.6%   112       
##    (0.7, 1]   (bad)      161   45.1%   20        
##    (1, Inf)   (very bad)  31    8.7%   4         
## See help('pareto-k-diagnostic') for details.

IUCN non-vetted full rarity model with quadratic age term

if (!file.exists("./nonvet_rare_qa_10k.rds")) {

  nonvet_rare_qa <- brm(rare | trials(species) ~ diversification_scaled + new_age_scaled + I(new_age_scaled^2) + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(nonvet_rare_qa, "./nonvet_rare_qa_10k.rds")

} else { nonvet_rare_qa <- readRDS("./nonvet_rare_qa_10k.rds")}

# nonvet_rare_qa

outcome <- summary(nonvet_rare_qa$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Family Age ^2", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

nonvet_rare_qa_plot <- stanplot(nonvet_rare_qa, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
nonvet_rare_qa_plot

pdf("./plots_tables/nonvet_rare_qa_plot.pdf", width=4, height=7)
nonvet_rare_qa_plot
dev.off()
## png 
##   2
rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.74 0.01 0.37 0.02 0.49 0.74 0.98 1.47 4071.21 1
Famly age 0.52 0.00 0.32 -0.11 0.31 0.52 0.73 1.16 4734.89 1
Family Age ^2 -0.27 0.00 0.33 -0.92 -0.49 -0.27 -0.04 0.37 4443.68 1
Woodiness -0.43 0.00 0.16 -0.74 -0.54 -0.43 -0.32 -0.10 4232.39 1
Herbaceous -0.60 0.00 0.26 -1.10 -0.77 -0.60 -0.43 -0.10 3301.34 1
Climate sum -0.20 0.00 0.11 -0.41 -0.27 -0.20 -0.12 0.02 4702.55 1
Number of species hybrids 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4651.11 1
Hermaphroditic -0.14 0.00 0.27 -0.65 -0.32 -0.13 0.04 0.38 4728.11 1
Dioecious -0.21 0.00 0.19 -0.58 -0.33 -0.21 -0.08 0.17 4027.80 1
Monoecious 0.07 0.00 0.19 -0.30 -0.06 0.07 0.19 0.44 4394.00 1
Asexual 0.14 0.00 0.21 -0.25 0.00 0.14 0.29 0.55 4146.01 1
Fleshy fruit 0.26 0.00 0.18 -0.11 0.13 0.26 0.39 0.61 4615.45 1
Animal pollinated 0.14 0.00 0.31 -0.45 -0.06 0.14 0.34 0.74 4548.91 1
SD brownian effects 0.69 0.01 0.30 0.08 0.49 0.69 0.90 1.28 645.05 1
SD family specific effects 1.07 0.00 0.12 0.82 1.00 1.08 1.16 1.29 994.13 1
bayes_R2(nonvet_rare_qa)
##     Estimate   Est.Error      Q2.5     Q97.5
## R2 0.9964198 0.000800492 0.9945435 0.9977011
##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(nonvet_rare_qa)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=nonvet_rare_qa$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# RMSE
rmse_nonvet_qa <- apply(pp_rare, 1, function(x) RMSE(x,nonvet_rare_qa$data$rare))
plot(density(rmse_nonvet_qa), main="RMSE across posterior predictions")

mean(rmse_nonvet_qa)
## [1] 7.964437
sd(rmse_nonvet_qa)
## [1] 0.870164
# LOO
loo(nonvet_rare_qa)
## Warning: Found 198 observations with a pareto_k > 0.7 in model
## 'nonvet_rare_qa'. With this many problematic observations, it may be more
## appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-
## validation rather than LOO.
## 
## Computed from 5000 by 357 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -817.7 31.8
## p_loo       210.6 10.1
## looic      1635.4 63.6
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)      86   24.1%   557       
##  (0.5, 0.7]   (ok)        73   20.4%   189       
##    (0.7, 1]   (bad)      166   46.5%   32        
##    (1, Inf)   (very bad)  32    9.0%   3         
## See help('pareto-k-diagnostic') for details.

With clade age + species, but no diversification

if (!file.exists("./vet_rare_naturalized_spec_age_10k.rds")) {

  vet_rare_nat_v2 <- brm(rare | trials(vetted) ~ Naturalised_scaled + species_scaled + new_age_scaled +  (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_rare_nat_v2, "./vet_rare_naturalized_spec_age_10k.rds")

} else { vet_rare_nat_v2 <- readRDS("./vet_rare_naturalized_spec_age_10k.rds")}

# vet_rare_nat_v2
# pairs(vet_rare_nat_v2)

outcome <- summary(vet_rare_nat_v2$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Number of species","Family age","SD brownian effects","SD family specific effects")

vet_rare_nat_v2_plot <- stanplot(vet_rare_nat_v2, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))

vet_rare_nat_v2_plot

pdf("./plots_tables/vet_rare_nat_v2_plot.pdf", width=4, height=3)
vet_rare_nat_v2_plot
dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)

bayes_R2(vet_rare_nat_v2)

##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_v2 <- brms::posterior_predict(vet_rare_nat_v2)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nat_v2$data$rare, pp_vet_rare_nat_v2[1:50, ]) + scale_x_continuous(trans="log1p")


# RMSE
rmse_vet_rare_nat_v2 <- apply(pp_vet_rare_nat_v2, 1, function(x) RMSE(x,vet_rare_nat_v2$data$rare))
plot(density(rmse_vet_rare_nat_v2), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat_v2)
sd(rmse_vet_rare_nat_v2)

With diversification, clade age, and species

if (!file.exists("./vet_rare_naturalized_spec_age_div_10k.rds")) {

  vet_rare_nat_v3 <- brm(rare | trials(vetted) ~ Naturalised_scaled + 
    diversification_scaled + species_scaled + new_age_scaled +  (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_rare_nat_v3, "./vet_rare_naturalized_spec_age_div_10k.rds")

} else { vet_rare_nat_v3 <- readRDS("./vet_rare_naturalized_spec_age_div_10k.rds")}

# vet_rare_nat_v3
# pairs(vet_rare_nat_v3)

outcome <- summary(vet_rare_nat_v3$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Number of species","Family age","SD brownian effects","SD family specific effects")

vet_rare_nat_v3_plot <- stanplot(vet_rare_nat_v3, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))

vet_rare_nat_v3_plot

pdf("./plots_tables/vet_rare_nat_v3_plot.pdf", width=4, height=3)
vet_rare_nat_v3_plot
dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)

bayes_R2(vet_rare_nat_v3)

##Posterior predictive checks
# first make posterior predictions
pp_vet_rare_nat_v3 <- brms::posterior_predict(vet_rare_nat_v3)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nat_v3$data$rare, pp_vet_rare_nat_v3[1:50, ]) + scale_x_continuous(trans="log1p")


# RMSE
rmse_vet_rare_nat_v3 <- apply(pp_vet_rare_nat_v3, 1, function(x) RMSE(x,vet_rare_nat_v3$data$rare))
plot(density(rmse_vet_rare_nat_v3), main="RMSE across posterior predictions")
mean(rmse_vet_rare_nat_v3)
sd(rmse_vet_rare_nat_v3)